home *** CD-ROM | disk | FTP | other *** search
- /// DemoAbstractor.cpp
- // Copyright: © 1998 by Apple Computer, Inc., all rights reserved.
-
-
- #include "Abstractor.h"
- #include "IAExtentCorpus.h"
- #include "EnglishAnalysis.h"
-
- #ifdef __mac_os
- #include <Files.h>
- #include <Types.h>
- #include "HFSStorage.h"
- #else
- #include <sys/stat.h>
- #include "UFSStorage.h"
- #endif
-
-
- #ifdef DEBUG_NEW
- #include <DebugNew.h>
- #endif
-
- #include <stdio.h>
- #include <string.h>
-
-
- #if __profile__
- #include <profiler.h>
- #endif
-
- #ifndef __mac_os
- typedef char* StringPtr;
- #endif
-
- void DumpInformation(const IADocumentAbstractor& abstractor, uint32 showlevels = 1);
-
- void DemoAbstractor (StringPtr file);
- void DemoAbstractor (StringPtr file) {
- #ifdef __mac_os
- FSSpec mMacFileSpec;
- short mDataForkRefNum;
- #else
- FILE* mFileSpec;
- struct stat fileStat;
- #endif
-
- char* stopwordFile = "EnglishStopwords";
- char* stemDictDoc = "EnglishSubstitutions";
- char* abbrevFile = "EnglishAbbreviations";
- #ifdef __mac_os
- IAStorage* storage = MakeHFSStorage(0,0,"\ptemp.index");
- #else
- IAStorage* storage = MakeFileStorage("temp.index");
- #endif
-
- IADeleteOnUnwind delStorage(storage);
- EnglishAnalysis* ana = new EnglishAnalysis(stopwordFile, stemDictDoc);
-
-
- uint32 numberOfSentences = 1;
- TermIndex* contextIndex = NULL;
- clock_t progFrequency = 10000;
- void* callerData = NULL;
- RankedProgressFn* progfn = NULL;
-
- #ifdef __mac_os
- OSErr iErr = FSMakeFSSpec( 0, 0, file, &mMacFileSpec);
-
- // open file
- OSErr err = FSpOpenDF(&mMacFileSpec, fsRdPerm, &mDataForkRefNum);
- if (err != noErr) {
- return;
- }
-
- // reads the file
- Handle dataHandle = nil;
-
- long fileLength;
- err = GetEOF(mDataForkRefNum, &fileLength);
- if (err != noErr) {
- return;
- }
- dataHandle = NewHandle(fileLength + 1);
- if (dataHandle == nil) return;
-
- err = SetFPos(mDataForkRefNum, fsFromStart, 0);
- if (err != noErr) {
- return;
- }
- HLock(dataHandle);
-
- err = FSRead(mDataForkRefNum, &fileLength, *dataHandle);
- if (err != noErr) {
- return;
- }
-
- *((*dataHandle)+fileLength) = '\0';
- char* buffer = (char*)(*dataHandle);
- #else
- mFileSpec = fopen(file, "r");
- if (mFileSpec == NULL) {
- return;
- }
-
- long fileLength;
- #ifndef WIN32
- lstat(file, &fileStat);
- #else
- stat(file, &fileStat);
- #endif
-
- fileLength = fileStat.st_size;
- char* buffer = (char*)malloc (fileLength + 1);
- buffer[fileLength] = '\0';
-
- fread (buffer, fileLength, 1, mFileSpec);
- #endif
- uint32 bufferLength = strlen(buffer); // length of document buffer;
-
- IAExtentParser* parser = new IAANSISentenceParser((byte*)buffer, bufferLength, abbrevFile); // NEW INTERFACE
- IADocumentAbstractor abstractor(parser, storage, ana); // NEW INTERFACE
- abstractor.Summarize(progfn, progFrequency, callerData, numberOfSentences, contextIndex);
-
- //
- // Using the GetNumberOfSentences and GetSentences functions below
- // you can loop and get the top ranked sentence;
- //
- uint32 showthismanysentences = 1;
- DumpInformation(abstractor, showthismanysentences);
-
- delete parser;
-
- #ifdef __mac_os
- HUnlock (dataHandle);
-
- // close the file
-
- err = FSClose(mDataForkRefNum);
-
- if (err != noErr) {
- return;
- }
- FlushVol(nil, mMacFileSpec.vRefNum);
- #else
- fclose(mFileSpec);
- free (buffer);
- #endif
- }
-
- void DumpInformation(const IADocumentAbstractor& abstractor, uint32 showlevel)
- {
-
- uint32 paragraphNumber = 0;
- bool firstHasBeenShown = false;
- bool showScore = true;
- bool showRank = true;
- uint32 numberTopWords = 5;
- bool showSentences = true;
-
- uint32 cnt = abstractor.GetNumberOfExtents();
- IAExtentDoc** sentences = (IAExtentDoc**)abstractor.GetExtents();
-
- for(int i=0; i< cnt; i++) {
- if(sentences[i] && (sentences[i]->GetRank() < showlevel) && (sentences[i]->GetLength() > 0)) {
- IAExtentDoc* doc = sentences[i];
-
- if(!firstHasBeenShown){
- firstHasBeenShown = true;
- paragraphNumber = doc->GetGroupNumber();
- }
-
- if( doc->GetGroupNumber() > paragraphNumber){
- paragraphNumber = doc->GetGroupNumber();
- printf ("Paragraph Seperator\r\r", 2);
- }
-
- if(showScore){
- printf("(%3.2f) ", doc->GetRankedHit()->GetScore());
- }
- if(showRank){
- printf("(%d) ", 1 + (int)doc->GetRank());
- }
- if(numberTopWords >0 ) {
- if(showlevel > 0) {
- printf ("[");
- }
-
- uint32 numberToShow = numberTopWords;
- if (numberToShow > doc->GetRankedHit()->GetMatchingTermsLen()) {
- numberToShow = doc->GetRankedHit()->GetMatchingTermsLen();
- }
-
- for(int w = 0 ; w< numberToShow; w++) {
- printf ("%s ", doc->GetRankedHit()->GetMatchingTerms()[w]->GetData());
- }
- if(showlevel >0) {
- printf ("]\n");
- }
- }
- if(showSentences) {
- #if 0
- int l = doc->GetLength();
- char* t1 = new char[l+1];
- memcpy(t1, (char*)doc->GetText() + doc->GetOffset(), l);
- t1[l] = '\0';
- printf ("Summary => %s\n", t1);
- delete [] t1;
- #endif
- uint32 l = doc->GetLength();;
- char* t1 = (char*)doc->GetExtent();
- printf ("Summary => %s\n", t1);
- IAFreeArray(t1);
- }
- }
- }
-
- }
-
- StringPtr GetFolderPath();
- StringPtr GetFolderPath()
- {
-
- char lpath[1024];
- int i = 0;
- printf ("Enter File Path >> ");
- while (!feof(stdin)) {
- char cc = fgetc(stdin);
- if (cc == '\n') break;
- lpath[i++] = cc;
- }
- lpath[i] = '\0';
- if (i == 0) return NULL;
- StringPtr folder = (StringPtr)IAMallocArraySized(byte, i + 1);
- #ifdef __mac_os
- folder[0] = i;
- memcpy(folder + 1, lpath, i);
- #else
- memcpy(folder, lpath, i);
- folder[i] = '\0';
- #endif
- return folder;
- }
-
- void main() {
-
- #if __profile__
- #ifdef powerc
- ProfilerInit(collectDetailed, PPCTimeBase, 1500, 50);
- #else
- ProfilerInit(collectSummary, microsecondsTimeBase, 1500, 50);
- #endif
- #endif
-
- StringPtr file = GetFolderPath();
-
- IATry {
- if (file)
- DemoAbstractor(file);
- }
- IACatch (const IAException& exception) {
- printf("Caught exception: \n", exception.What());
- }
-
- if (file != NULL) {
- #ifdef __mac_os
- IAFreeArraySized (file, byte, file[0] + 1);
- #else
- IAFreeArraySized (file, byte, strlen(file) + 1);
- #endif
- }
-
-
- #if __profile__
- ProfilerDump("\pDemoAccessor.prof");
- ProfilerTerm();
- #endif
-
- #ifdef IADEBUG
- IAReportMemoryUsage();
- #endif
-
- #ifdef DEBUG_NEW
- DebugNewReportLeaks();
- #endif
- }
-
-
-
-
-